Chapter 4 MAGs overview

rm(list=ls()) #clear environment
load("data/squirrels_data.Rdata")

4.1 MAGs phylogeny

# Which phylum the MAG belongs to
phyla <- ehi_phylum_colors %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
  arrange(match(genome, genome_tree$tip.label)) %>%
  select(phylum, colors) %>%
  unique()

# What is the genome size of the MAG in MBs (megabases)
lengths <- genome_metadata %>%
  select(c(genome,length)) %>%
  mutate(length=round(length/1000000,2))

# What is the completeness of the MAG
mag_completeness <- genome_metadata %>%
  select(c(genome,completeness)) %>%
  as.data.frame() %>%
  remove_rownames() %>%
  column_to_rownames(var = "genome")


# Generate the phylum color heatmap
phylum_heatmap <- ehi_phylum_colors %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
  arrange(match(genome, genome_tree$tip.label)) %>%
  select(genome,phylum) %>%
  mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
  column_to_rownames(var = "genome")

# Create baseline circular genome tree
circular_tree <- force.ultrametric(genome_tree,method="extend") %>%
  ggtree(., layout = 'circular', size = 0.1, angle=45) +
  xlim(-1, NA)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors ring
circular_tree <- gheatmap(circular_tree, phylum_heatmap, offset=0.05, width=0.3, colnames=FALSE) +
  scale_fill_manual(values=colors_alphabetic, name="Phylum") +
  #geom_tiplab2(size=1, hjust=-0.1) +
  theme(legend.position = "right", plot.margin = margin(0, 0, 0, 0), panel.margin = margin(0, 0, 0, 0))

# Flush color scale to enable a new color scheme in the next ring
circular_tree <- circular_tree + new_scale_fill()

# Add completeness ring
circular_tree <-  circular_tree +
  new_scale_fill() +
  scale_fill_gradient(low = "#d1f4ba", high = "#f4baba", name="Genome\ncontamination") +
  geom_fruit(
    data=genome_metadata,
    geom=geom_bar,
    mapping = aes(x=completeness, y=genome, fill=contamination),
    offset = 0.55,
    orientation="y",
    stat="identity")

# Add genome-size ring
circular_tree <-  circular_tree +
  new_scale_fill() +
  scale_fill_manual(values = "#cccccc") +
  geom_fruit(
    data=lengths,
    geom=geom_bar,
    mapping = aes(x=length, y=genome),
    offset = 0.05,
    orientation="y",
    stat="identity")


#Plot circular tree
circular_tree

4.2 Genome quality

genome_metadata$completeness %>% mean()
[1] 84.89347
genome_metadata$completeness %>% sd()
[1] 15.4699
genome_metadata$contamination %>% mean()
[1] 2.013071
genome_metadata$contamination %>% sd()
[1] 2.10322
#create input table from original genome table
genome_details <- genome_metadata %>%
  select(c(genome,domain,phylum,completeness,contamination,length)) %>%
  mutate(length=round(length/1000000,2)) %>% #change length to MBs
  rename(comp=completeness,cont=contamination,size=length) %>% #rename columns
  remove_rownames() %>%
  arrange(match(genome, rev(genome_tree$tip.label))) #sort MAGs according to phylogenetic tree

#generate genome quality biplot
genome_stats_biplot <- genome_details %>%
  ggplot(aes(x=comp,y=cont,size=size,color=phylum)) +
  geom_point(alpha=0.7) +
  ylim(c(10,0)) +
  scale_color_manual(values=colors_alphabetic) +
  labs(y= "Contamination", x = "Completeness") +
  theme_classic() +
  theme(legend.position = "none")

#generate contamination boxplot
genome_stats_cont <- genome_details %>%
  ggplot(aes(y=cont)) +
  ylim(c(10,0)) +
  geom_boxplot(colour = "#999999", fill="#cccccc") +
  theme_void() +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        plot.margin = unit(c(0, 0, 0.40, 0),"inches")) #add bottom-margin (top, right, bottom, left)

#generate completeness boxplot
genome_stats_comp <-genome_details %>%
  ggplot(aes(x=comp)) +
  xlim(c(50,100)) +
  geom_boxplot(colour = "#999999", fill="#cccccc") +
  theme_void() +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        plot.margin = unit(c(0, 0, 0, 0.50),"inches")) #add left-margin (top, right, bottom, left)

#create composite figure
grid.arrange(grobs = list(genome_stats_comp,genome_stats_biplot,genome_stats_cont),
             layout_matrix = rbind(c(1,1,1,1,1,1,1,1,1,1,1,4),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3),
                                   c(2,2,2,2,2,2,2,2,2,2,2,3)))

4.3 Functional attributes of MAGs

#Generate a basal utrametric tree for the sake of visualisation
gift_tree <- force.ultrametric(genome_tree,method="extend") %>%
   ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
gift_tree <- gheatmap(gift_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE, color=NA) +
   scale_fill_manual(values=colors_alphabetic) +
    labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
gift_tree <- gift_tree + new_scale_fill()

gift_table <- genome_gifts %>%
  column_to_rownames(var="genome")

#Add functions heatmap
gift_tree <- gheatmap(gift_tree, gift_table, offset=0.5, width=3.5, colnames=FALSE, color=NA) +
  vexpand(.08) +
  coord_cartesian(clip = "off") +
  scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white")+
  labs(fill="GIFT")

#Reset fill scale to use a different colour profile in the heatmap
gift_tree <- gift_tree + new_scale_fill()

# Add completeness barplots
gift_tree <- gift_tree +
            geom_fruit(data=genome_metadata,
            geom=geom_bar,
            #grid.params=list(axis="x", text.size=2, nbreak = 1),
            axis.params=list(vline=TRUE),
            mapping = aes(x=length, y=genome, fill=completeness),
                 offset = 3.8,
                 orientation="y",
                 stat="identity") +
            scale_fill_gradient(low = "#cf8888", high = "#a2cc87") +
            labs(fill="Genome\ncompleteness")

#Plot combined tree + heatmap
gift_tree +
  theme(legend.position='right')

4.4 Functional ordination of MAGs (distillr)

distillr_table <- genome_gifts %>%
  column_to_rownames(var="genome")

# Generate the tSNE ordination
tSNE_func2 <- Rtsne(X=distillr_table, dims = 2, check_duplicates = FALSE)

# Plot the ordination
tSNE_func2$Y %>%
  as.data.frame() %>%
  mutate(genome=rownames(distillr_table)) %>%
  inner_join(genome_metadata, by="genome") %>%
  rename(tSNE1="V1", tSNE2="V2") %>%
  select(genome,phylum,tSNE1,tSNE2, completeness) %>%
  ggplot(aes(x = tSNE1, y = tSNE2, color = phylum, size=completeness))+
  geom_point(shape=16, alpha=0.7) +
  scale_color_manual(values=colors_alphabetic) +
  theme_minimal() +
  theme(legend.position = "right")